package dmp.cjh.url.domain;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.log4j.Logger;

public class DomainGetMapper extends Mapper<LongWritable, Text, Text, LongWritable> {
	public static Logger logger1 = Logger.getLogger(Mapper.class);

	@Override
	protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
		String line = value.toString();
		String[] words = line.split("\\|");
		String domain = words[28];
		String regex="([^\\.]*)\\.com$|\\.com\\.cn";
		String rr = URLUtils.regexpExtract(domain, regex);
		context.write(new Text(rr), new LongWritable(1));

	}
}
